library(p8105.datasets)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
data("rest_inspec")
rest_inspec %>%
janitor::clean_names() %>%
mutate(inspection_date <- as.Date(inspection_date)) %>%
filter(inspection_date >= as.Date("2014-01-01"),
score != "Missing", boro != "Missing") %>%
mutate(year = year(inspection_date)) %>%
plot_ly(x = ~boro, y = ~score, type = "box",
color = ~boro, frame = ~year, alpha = 0.5) %>%
layout(title = "The Distribution of Inspection Score",
xaxis = list(title = "Borough"),
yaxis = list(title = "Each Restaurant's Score"))
rest_inspec %>%
janitor::clean_names() %>%
mutate(inspection_date <- as.Date(inspection_date)) %>%
filter(inspection_date >= as.Date("2015-01-01"),
score != "Missing", boro != "Missing") %>%
mutate(year = year(inspection_date)) %>%
group_by(camis, boro) %>%
summarise(score_per_rest_by_boro = mean(score)) %>%
plot_ly(x = ~boro, y = ~score_per_rest_by_boro, type = "box",
color = ~boro, alpha = 0.5) %>%
layout(title = "The Distribution of Inspection Score",
xaxis = list(title = "Borough"),
yaxis = list(title = "Each Restaurant's Mean Score (2015-2017)"))
## `summarise()` has grouped output by 'camis'. You can override using the `.groups` argument.
grade_plot_by_boro <- rest_inspec %>%
janitor::clean_names() %>%
mutate(inspection_date <- as.Date(inspection_date)) %>%
filter(inspection_date >= as.Date("2014-01-01"),
score != "Missing", score >= 0, boro != "Missing") %>%
mutate(year = year(inspection_date)) %>%
mutate(grade = case_when(0 <= score & score <= 13 ~ "A",
14 <= score & score <= 27 ~ "B",
score >= 28 ~ "C")) %>%
group_by(boro,grade, year) %>%
summarise(Count = n()) %>%
rename(Grade = grade) %>%
ggplot(aes(boro, Count, fill = Grade, frame = year)) + geom_bar(stat = "identity", position = position_dodge2())
## `summarise()` has grouped output by 'boro', 'grade'. You can override using the `.groups` argument.
ggplotly(grade_plot_by_boro)
rest_inspec %>%
janitor::clean_names() %>%
group_by(violation_code) %>%
count(sort = T)
## # A tibble: 100 × 2
## # Groups: violation_code [100]
## violation_code n
## <chr> <int>
## 1 10F 55228
## 2 08A 40258
## 3 04L 27594
## 4 02G 27235
## 5 06C 26135
## 6 06D 25912
## 7 10B 22365
## 8 04N 20128
## 9 02B 19562
## 10 04M 8952
## # … with 90 more rows
rest_inspec %>%
filter(violation_code %in% c("08A", "04L")) %>%
mutate(Date = format(inspection_date, "%Y-%m")) %>%
group_by(Date, violation_code) %>%
summarise(count = n()) %>%
ungroup() %>%
mutate(violation_code = violation_code %>% fct_relevel("08A", "04L")) %>%
mutate(violation_code = recode(violation_code, "08A" = "Facility not vermin proof",
"04L" = "Evidence of mice or live mice")) %>%
plot_ly(x = ~Date, y = ~count,
color = ~violation_code, type = "scatter", mode = "lines")
## `summarise()` has grouped output by 'Date'. You can override using the `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
names(rest_inspec)
## [1] "action" "boro" "building"
## [4] "camis" "critical_flag" "cuisine_description"
## [7] "dba" "inspection_date" "inspection_type"
## [10] "phone" "record_date" "score"
## [13] "street" "violation_code" "violation_description"
## [16] "zipcode" "grade" "grade_date"
skimr::skim(rest_inspec)
Data summary
| Name |
rest_inspec |
| Number of rows |
397584 |
| Number of columns |
18 |
| _______________________ |
|
| Column type frequency: |
|
| character |
12 |
| numeric |
3 |
| POSIXct |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| action |
1117 |
1.00 |
32 |
131 |
0 |
5 |
0 |
| boro |
0 |
1.00 |
5 |
13 |
0 |
6 |
0 |
| building |
130 |
1.00 |
1 |
10 |
0 |
7267 |
0 |
| critical_flag |
0 |
1.00 |
8 |
14 |
0 |
3 |
0 |
| cuisine_description |
0 |
1.00 |
4 |
64 |
0 |
85 |
0 |
| dba |
334 |
1.00 |
2 |
86 |
0 |
21018 |
0 |
| inspection_type |
1117 |
1.00 |
25 |
59 |
0 |
34 |
0 |
| phone |
3 |
1.00 |
10 |
12 |
0 |
25261 |
0 |
| street |
5 |
1.00 |
3 |
40 |
0 |
3326 |
0 |
| violation_code |
6316 |
0.98 |
3 |
3 |
0 |
99 |
0 |
| violation_description |
6890 |
0.98 |
27 |
360 |
0 |
93 |
0 |
| grade |
201937 |
0.49 |
1 |
14 |
0 |
6 |
0 |
Variable type: numeric
| camis |
0 |
1.00 |
44534756.00 |
4277136.67 |
30075445 |
41227319 |
41622444 |
50011150 |
50071063 |
▁▁▇▁▅ |
| score |
22642 |
0.94 |
18.93 |
13.00 |
-2 |
11 |
15 |
24 |
151 |
▇▂▁▁▁ |
| zipcode |
5 |
1.00 |
10675.34 |
598.72 |
10001 |
10022 |
10468 |
11229 |
11697 |
▇▂▁▆▃ |
Variable type: POSIXct
| inspection_date |
0 |
1.00 |
1900-01-01 00:00:00 |
2017-10-17 00:00:00 |
2016-02-03 00:00:00 |
1420 |
| record_date |
0 |
1.00 |
2017-10-19 06:00:49 |
2017-10-19 06:00:59 |
2017-10-19 06:00:49 |
3 |
| grade_date |
204287 |
0.49 |
2012-05-01 00:00:00 |
2017-10-17 00:00:00 |
2016-02-17 00:00:00 |
1331 |